Preparing the environment

Load the required packages:

library(tidyverse)
library(glue)
library(knitr)
library(Matrix)
library(scRNAseq)
library(scater)
library(scran)
library(PCAtools)
library(dittoSeq)
library(Nebulosa)
library(pheatmap)
library(viridis)

Preparing the data

# Load the Baron et al. (2016) human pancreas dataset
sce <- BaronPancreasData('human')
sce
## class: SingleCellExperiment 
## dim: 20125 8569 
## metadata(0):
## assays(1): counts
## rownames(20125): A1BG A1CF ... ZZZ3 pk
## rowData names(0):
## colnames(8569): human1_lib1.final_cell_0001 human1_lib1.final_cell_0002
##   ... human4_lib3.final_cell_0700 human4_lib3.final_cell_0701
## colData names(2): donor label
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
# Cell metadata
colData(sce)
## DataFrame with 8569 rows and 2 columns
##                                   donor              label
##                             <character>        <character>
## human1_lib1.final_cell_0001  GSM2230757             acinar
## human1_lib1.final_cell_0002  GSM2230757             acinar
## human1_lib1.final_cell_0003  GSM2230757             acinar
## human1_lib1.final_cell_0004  GSM2230757             acinar
## human1_lib1.final_cell_0005  GSM2230757             acinar
## ...                                 ...                ...
## human4_lib3.final_cell_0697  GSM2230760 activated_stellate
## human4_lib3.final_cell_0698  GSM2230760              alpha
## human4_lib3.final_cell_0699  GSM2230760               beta
## human4_lib3.final_cell_0700  GSM2230760               beta
## human4_lib3.final_cell_0701  GSM2230760             ductal
# Cell type summary
sce$label %>%
    table() %>%
    enframe() %>%
    arrange(desc(value))
name value
beta 2525
alpha 2326
ductal 1077
acinar 958
delta 601
activated_stellate 284
gamma 255
endothelial 252
quiescent_stellate 173
macrophage 55
mast 25
epsilon 18
schwann 13
t_cell 7
# Remove cell types with fewer than 100 cells
cell_types_to_keep <- sce$label %>%
    table() %>%
    enframe() %>%
    filter(value >= 100) %>%
    pull(name)
sce <- sce[, sce$label %in% cell_types_to_keep]
sce
## class: SingleCellExperiment 
## dim: 20125 8451 
## metadata(0):
## assays(1): counts
## rownames(20125): A1BG A1CF ... ZZZ3 pk
## rowData names(0):
## colnames(8451): human1_lib1.final_cell_0001 human1_lib1.final_cell_0002
##   ... human4_lib3.final_cell_0700 human4_lib3.final_cell_0701
## colData names(2): donor label
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
# Order cell types by freqency
sce$label <- fct_infreq(sce$label)
colData(sce)
## DataFrame with 8451 rows and 2 columns
##                                   donor              label
##                             <character>           <factor>
## human1_lib1.final_cell_0001  GSM2230757             acinar
## human1_lib1.final_cell_0002  GSM2230757             acinar
## human1_lib1.final_cell_0003  GSM2230757             acinar
## human1_lib1.final_cell_0004  GSM2230757             acinar
## human1_lib1.final_cell_0005  GSM2230757             acinar
## ...                                 ...                ...
## human4_lib3.final_cell_0697  GSM2230760 activated_stellate
## human4_lib3.final_cell_0698  GSM2230760 alpha             
## human4_lib3.final_cell_0699  GSM2230760 beta              
## human4_lib3.final_cell_0700  GSM2230760 beta              
## human4_lib3.final_cell_0701  GSM2230760 ductal

Data normalization

# Solution 1: library sizes (i.e. total sum of counts per cell)
sce <- computeLibraryFactors(sce)
summary(sizeFactors(sce))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2052  0.6259  0.8707  1.0000  1.2018  5.8971
# Solution 2: deconvolution strategy implemented in scran
quick_clusters <- quickCluster(sce)
table(quick_clusters)
## quick_clusters
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14 
##  580  777 1190  934 1384  292  276  594  299  247  887  313  525  153
sce <- computeSumFactors(sce, clusters = quick_clusters)
summary(sizeFactors(sce))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2373  0.6566  0.9239  1.0000  1.2380  5.5974
# Calculate log-transformed normalized counts
sce <- logNormCounts(sce)
sce
## class: SingleCellExperiment 
## dim: 20125 8451 
## metadata(0):
## assays(2): counts logcounts
## rownames(20125): A1BG A1CF ... ZZZ3 pk
## rowData names(0):
## colnames(8451): human1_lib1.final_cell_0001 human1_lib1.final_cell_0002
##   ... human4_lib3.final_cell_0700 human4_lib3.final_cell_0701
## colData names(3): donor label sizeFactor
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):

Selecting highly variable genes

# Model gene variance
dec <- modelGeneVar(sce)
dec
## DataFrame with 20125 rows and 6 columns
##              mean      total       tech          bio   p.value       FDR
##         <numeric>  <numeric>  <numeric>    <numeric> <numeric> <numeric>
## A1BG   0.00472055 0.00538423 0.00540704 -2.28144e-05 0.5102683  0.787099
## A1CF   0.20494779 0.24831971 0.22739429  2.09254e-02 0.2872580  0.787099
## A2M    0.03861706 0.05787503 0.04415246  1.37226e-02 0.0289714  0.404091
## A2ML1  0.00000000 0.00000000 0.00000000  0.00000e+00       NaN       NaN
## A4GALT 0.05872583 0.07569261 0.06702968  8.66293e-03 0.2152109  0.787099
## ...           ...        ...        ...          ...       ...       ...
## ZYG11B   0.182301   0.201515   0.203401  -0.00188619  0.522558  0.787099
## ZYX      0.440804   0.533917   0.456515   0.07740244  0.150475  0.787099
## ZZEF1    0.159178   0.176972   0.178577  -0.00160495  0.521863  0.787099
## ZZZ3     0.112700   0.130571   0.127730   0.00284117  0.446028  0.787099
## pk       0.177232   0.237329   0.197988   0.03934163  0.112703  0.787099
dec %>%
    as.data.frame() %>%
    ggplot(mapping = aes(x = mean, y = total)) +
    geom_point() +
    geom_smooth() +
    labs(
        title = "Modelling gene variance",
        x = "Mean log-expression",
        y = "Total variance"
    ) +
    theme_bw()

# Select the top 2000 variable genes
top_hvgs <- getTopHVGs(dec, n = 2000)
length(top_hvgs)
## [1] 2000
# Select the top 10% of variable genes
top_hvgs <- getTopHVGs(dec, prop = 0.1)
length(top_hvgs)
## [1] 832
# Select all variable genes with FDR below 5%
top_hvgs <- getTopHVGs(dec, fdr.threshold = 0.05)
length(top_hvgs)
## [1] 817

Principal component analysis

# Set the RNG seed for reproducible results
set.seed(42)
# Solution 1: run the PCA and use scree plot to find the number of significant PCs
sce <- runPCA(sce, ncomponents = 50, subset_row = top_hvgs)
sce
## class: SingleCellExperiment 
## dim: 20125 8451 
## metadata(0):
## assays(2): counts logcounts
## rownames(20125): A1BG A1CF ... ZZZ3 pk
## rowData names(0):
## colnames(8451): human1_lib1.final_cell_0001 human1_lib1.final_cell_0002
##   ... human4_lib3.final_cell_0700 human4_lib3.final_cell_0701
## colData names(3): donor label sizeFactor
## reducedDimNames(1): PCA
## mainExpName: NULL
## altExpNames(0):
percent_var <- attr(reducedDim(sce, "PCA"), "percentVar")
pca_elbow <- findElbowPoint(percent_var)
pca_elbow
## [1] 6
ggplot(mapping = aes(x = seq_along(percent_var), y = percent_var)) +
    geom_point() +
    geom_vline(xintercept = pca_elbow, col = "blue") +
    labs(
        title = "Scree plot",
        x = "PC",
        y = "Variance explained (%)"
    ) +
    theme_bw()

reducedDim(sce, "PCA") <- reducedDim(sce, "PCA")[, 1:pca_elbow, drop = FALSE]
ncol(reducedDim(sce, "PCA"))
## [1] 6
# Solution 2: run the PCA and remove PCs corresponding to technical noise
sce <- denoisePCA(sce, dec, subset.row = top_hvgs, min.rank = 5, max.rank = 50)
ncol(reducedDim(sce, "PCA"))
## [1] 11

Dimensionality reduction by UMAP

# Set the RNG seed for reproducible results
set.seed(42)
# Run UMAP
sce <- runUMAP(sce, dimred = "PCA")
# UMAP plot colored by cell type
dittoDimPlot(sce, "label", reduction.use = "UMAP", size = 0.5,
             main = "UMAP plot colored by cell type", legend.show = FALSE,
             do.label = TRUE, labels.highlight = FALSE, labels.size = 4)

Marker gene detection

Detecting marker genes

markers <- findMarkers(sce, groups = sce$label, test.type = "t",
                       pval.type = "all", direction = "up", lfc = 1)
markers
## List of length 9
## names(9): beta alpha ductal acinar ... gamma endothelial quiescent_stellate
head(markers[["alpha"]])
## DataFrame with 6 rows and 11 columns
##            p.value          FDR summary.logFC logFC.beta logFC.ductal
##          <numeric>    <numeric>     <numeric>  <numeric>    <numeric>
## GCG   5.98541e-122 1.20456e-117       7.49431    7.37235      7.40717
## TTR    6.14014e-86  6.17851e-82       3.55940    4.81060      7.21071
## VGF    2.22481e-10  1.49247e-06       1.70395    1.45385      3.71236
## IRX2   2.41172e-09  1.21340e-05       1.30156    1.53550      1.53245
## MUC13  4.77561e-05  1.92218e-01       1.25263    1.68502      1.36584
## GC     4.84802e-04  1.00000e+00       1.20531    1.83950      1.72148
##       logFC.acinar logFC.delta logFC.activated_stellate logFC.gamma
##          <numeric>   <numeric>                <numeric>   <numeric>
## GCG        7.85966     7.14584                  7.50722     7.16743
## TTR        7.37206     5.12772                  7.17617     3.55940
## VGF        3.78697     2.71854                  3.70799     1.70395
## IRX2       1.53669     1.53905                  1.48849     1.49181
## MUC13      1.69684     1.39554                  1.70439     1.25263
## GC         1.84637     1.84548                  1.83764     1.20531
##       logFC.endothelial logFC.quiescent_stellate
##               <numeric>                <numeric>
## GCG             7.64652                  7.49431
## TTR             7.24128                  6.98467
## VGF             3.67150                  3.66884
## IRX2            1.51647                  1.30156
## MUC13           1.68936                  1.68033
## GC              1.82979                  1.80168

Top marker for each cell type

top_markers <- lapply(markers, function(x) rownames(x)[1]) %>%
    unlist() %>%
    enframe()
top_markers
name value
beta INS
alpha GCG
ductal KRT19
acinar CPA2
delta SST
activated_stellate COL6A3
gamma PPY
endothelial PLVAP
quiescent_stellate IL24

Top marker gene tables

for (cell_type in levels(sce$label)) {
    cat("\n\n###", cell_type, "\n")
    markers[[cell_type]] %>%
        as.data.frame() %>%
        head(n = 5) %>%
        t() %>%
        kable() %>%
        print()
}

beta

INS IAPP ADCYAP1 DLK1 CDKN1C
p.value 0.000000 0.000000 0.000000 0.000000 0.0000466
FDR 0.000000 0.000000 0.000000 0.000000 0.1874358
summary.logFC 6.494641 4.596394 1.750503 1.612272 1.3600120
logFC.alpha 7.345333 5.436746 1.888202 1.651457 1.4956267
logFC.ductal 7.920327 5.623030 1.882349 1.612369 1.7272021
logFC.acinar 7.707704 5.643644 1.883973 1.696437 2.0527564
logFC.delta 6.286670 4.697971 1.878234 1.611269 1.5318399
logFC.activated_stellate 7.707154 5.416104 1.885090 1.612272 1.7205630
logFC.gamma 7.071032 5.093481 1.750503 1.632269 1.5936136
logFC.endothelial 6.421451 4.687363 1.854243 1.677804 1.6434364
logFC.quiescent_stellate 6.494641 4.596394 1.836874 1.653976 1.3600120

alpha

GCG TTR VGF IRX2 MUC13
p.value 0.000000 0.000000 0.0000000 0.0000000 0.0000478
FDR 0.000000 0.000000 0.0000015 0.0000121 0.1922183
summary.logFC 7.494313 3.559397 1.7039548 1.3015621 1.2526301
logFC.beta 7.372355 4.810596 1.4538461 1.5354968 1.6850151
logFC.ductal 7.407170 7.210706 3.7123605 1.5324537 1.3658426
logFC.acinar 7.859658 7.372062 3.7869696 1.5366916 1.6968356
logFC.delta 7.145837 5.127721 2.7185375 1.5390544 1.3955404
logFC.activated_stellate 7.507217 7.176168 3.7079909 1.4884898 1.7043892
logFC.gamma 7.167429 3.559397 1.7039548 1.4918066 1.2526301
logFC.endothelial 7.646524 7.241281 3.6715012 1.5164730 1.6893638
logFC.quiescent_stellate 7.494313 6.984668 3.6688426 1.3015621 1.6803336

ductal

KRT19 ANXA4 TACSTD2 MMP7 KRT7
p.value 0.000000 0.000000 0.000000 0.000000 0.000000
FDR 0.000000 0.000000 0.000000 0.000000 0.000000
summary.logFC 2.317691 1.543632 1.555283 1.503497 1.566896
logFC.beta 2.899568 1.772241 2.866554 1.534856 2.867409
logFC.alpha 2.792522 1.600266 2.869646 1.535286 2.869428
logFC.acinar 2.317691 1.543632 1.555283 1.494941 1.566896
logFC.delta 2.763065 1.669725 2.861714 1.530507 2.867082
logFC.activated_stellate 2.917943 1.735212 2.855663 1.535200 2.883869
logFC.gamma 2.867642 1.632090 2.853197 1.542536 2.880231
logFC.endothelial 2.913399 1.701446 2.865230 1.532609 2.882058
logFC.quiescent_stellate 2.890150 1.738369 2.833198 1.503497 2.867693

acinar

CPA2 PLA2G1B CTRC PNLIPRP1 CELA2A
p.value 0.000000 0.000000 0.000000 0.000000 0.000000
FDR 0.000000 0.000000 0.000000 0.000000 0.000000
summary.logFC 4.973581 6.433526 4.903089 4.232669 5.492671
logFC.beta 4.962105 6.387992 4.904307 4.217944 5.514975
logFC.alpha 4.950202 6.394214 4.884335 4.216155 5.492671
logFC.ductal 4.629703 6.098158 4.728920 4.116805 5.380444
logFC.delta 4.962614 6.378387 4.892876 4.211313 5.509913
logFC.activated_stellate 4.988029 6.492478 4.944764 4.239821 5.553757
logFC.gamma 4.996565 6.478299 4.939334 4.243920 5.556017
logFC.endothelial 4.965730 6.375447 4.934228 4.227130 5.561614
logFC.quiescent_stellate 4.973581 6.433526 4.903089 4.232669 5.577627

delta

SST RBP4 SEC11C LEPR RGS2
p.value 0.000000 0.000000 0.108874 0.223117 0.5277632
FDR 0.000000 0.000000 1.000000 1.000000 1.0000000
summary.logFC 7.937963 2.493718 1.100166 1.034141 0.9949392
logFC.beta 8.299511 2.493718 1.118113 1.068027 1.1310952
logFC.alpha 9.135310 3.656415 1.830928 1.075711 1.0542434
logFC.ductal 9.207961 3.536460 2.251563 1.065786 1.2116545
logFC.acinar 9.552325 3.647464 2.198402 1.078907 1.0186866
logFC.activated_stellate 9.070288 3.614390 2.515996 1.065576 1.1901639
logFC.gamma 8.345367 3.650838 1.100166 1.082268 0.9949392
logFC.endothelial 8.225952 3.583831 2.429244 1.034141 1.3200028
logFC.quiescent_stellate 7.937963 3.577324 2.295991 1.085346 1.3282833

activated_stellate

COL6A3 SFRP2 COL1A1 MMP14 CRLF1
p.value 0.000000 0.000000 0.000000 0.000000 0.000000
FDR 0.000000 0.000000 0.000000 0.000000 0.000000
summary.logFC 3.265831 2.148280 2.930628 1.903396 1.746462
logFC.beta 3.528541 2.196598 4.992133 2.701599 1.939933
logFC.alpha 3.530632 2.199120 4.995548 2.703271 1.937221
logFC.ductal 3.529205 2.196201 4.807298 2.087105 1.939462
logFC.acinar 3.517469 2.192601 4.939524 2.499019 1.935619
logFC.delta 3.526674 2.198532 4.969223 2.707341 1.940746
logFC.gamma 3.539991 2.192394 4.971147 2.704148 1.940746
logFC.endothelial 3.466527 2.181544 4.788247 1.965494 1.908349
logFC.quiescent_stellate 3.265831 2.148280 2.930628 1.903396 1.746462

gamma

PPY AQP3 STMN2 PAX6 FXYD2
p.value 0.000000 0.042445 0.7296890 0.7680164 0.9764717
FDR 0.000000 1.000000 1.0000000 1.0000000 1.0000000
summary.logFC 8.911443 1.226774 0.9540215 0.9453047 0.8450666
logFC.beta 8.911443 2.497311 1.2411217 0.9453047 0.8450666
logFC.alpha 8.877287 2.503969 0.9540215 1.0616249 1.7410818
logFC.ductal 9.040499 2.475611 1.7810516 2.5250825 1.6878714
logFC.acinar 9.146109 2.493111 1.7987651 2.5612699 2.7464560
logFC.delta 8.869794 1.226774 1.4884268 1.7488264 1.3541694
logFC.activated_stellate 9.003884 2.923554 1.7361047 2.5149275 2.6732596
logFC.endothelial 9.002623 2.921363 1.7933635 2.4693736 2.6445033
logFC.quiescent_stellate 8.761964 2.902590 1.7659486 2.4574157 2.6707261

endothelial

PLVAP PECAM1 RGCC CD93 PODXL
p.value 0.000000 0.000000 0.000000 0.000000 0.000000
FDR 0.000000 0.000000 0.000000 0.000000 0.000000
summary.logFC 4.111690 2.703817 2.972217 2.277113 2.352657
logFC.beta 4.111690 2.715327 2.983010 2.330130 2.569880
logFC.alpha 4.110405 2.716458 3.017839 2.329922 2.567203
logFC.ductal 4.115947 2.715270 3.018544 2.328059 2.503218
logFC.acinar 4.110503 2.714780 3.013594 2.327695 2.352657
logFC.delta 4.104037 2.714132 3.014559 2.331779 2.571450
logFC.activated_stellate 4.091823 2.703817 2.971000 2.277113 2.555145
logFC.gamma 4.118904 2.712668 3.021770 2.331779 2.571450
logFC.quiescent_stellate 4.122417 2.714362 2.972217 2.294970 2.571450

quiescent_stellate

IL24 RGS5 NDUFA4L2 ADCY3 EPAS1
p.value 0.0000000 0.0000000 0.0000060 0.0014456 0.0105387
FDR 0.0000013 0.0000647 0.0399189 1.0000000 1.0000000
summary.logFC 2.2084315 1.7370203 1.4895637 1.3102146 1.2613258
logFC.beta 2.7258057 1.8047009 1.9270393 1.7778949 2.1628409
logFC.alpha 2.7308428 1.8120440 1.9290305 1.7581957 2.6028853
logFC.ductal 2.7323627 1.8111068 1.9314936 1.7698446 2.2945302
logFC.acinar 2.7044418 1.8158302 1.9246545 1.7763715 1.7709552
logFC.delta 2.7133656 1.8176394 1.9267207 1.7643182 2.2676807
logFC.activated_stellate 2.2084315 1.7471982 1.4895637 1.3102146 1.7934911
logFC.gamma 2.7314057 1.8257783 1.9096061 1.7759010 2.2976449
logFC.endothelial 2.6106708 1.7370203 1.7595018 1.6715952 1.2613258

Top marker gene expression plots

top_markers <- lapply(markers, function(x) rownames(x)[1])
for (cell_type in levels(sce$label)) {
    cat("\n\n###", cell_type, "\n")
    expression_plot <- dittoDimPlot(sce, top_markers[[cell_type]], reduction.use = "UMAP",
                                    size = 0.5, order = "increasing")
    print(expression_plot)
}

beta

alpha

ductal

acinar

delta

activated_stellate

gamma

endothelial

quiescent_stellate

Top marker gene density plots

top_markers <- lapply(markers, function(x) rownames(x)[1])
for (cell_type in levels(sce$label)) {
    cat("\n\n###", cell_type, "\n")
    density_plot <- plot_density(sce, top_markers[[cell_type]], size = 0.5)
    print(density_plot)
}

beta

alpha

ductal

acinar

delta

activated_stellate

gamma

endothelial

quiescent_stellate

Top marker gene violin plots

top_markers <- lapply(markers, function(x) rownames(x)[1])
for (cell_type in levels(sce$label)) {
    cat("\n\n###", cell_type, "\n")
    violin_plot <- dittoPlot(sce, top_markers[[cell_type]], group.by = "label",
                             plots = c("vlnplot"), vlnplot.lineweight = 0.5,
                             legend.show = FALSE)
    print(violin_plot)
}

beta

alpha

ductal

acinar

delta

activated_stellate

gamma

endothelial

quiescent_stellate

Top marker gene ridge plots

top_markers <- lapply(markers, function(x) rownames(x)[1])
for (cell_type in levels(sce$label)) {
    cat("\n\n###", cell_type, "\n")
    ridge_plot <- dittoPlot(sce, top_markers[[cell_type]], group.by = "label",
                            plots = c("ridgeplot"), ridgeplot.lineweight = 0.5,
                            legend.show = FALSE)
    print(ridge_plot)
}

beta

alpha

ductal

acinar

delta

activated_stellate

gamma

endothelial

quiescent_stellate

Top marker gene heatmaps

top_markers <- sapply(markers, function(x) rownames(x)[1])
# Average gene expression heatmap
plotGroupedHeatmap(sce, top_markers, group = "label", color = viridis(100),
                   cluster_rows = FALSE, cluster_cols = FALSE)

# Gene expression heatmap
dittoHeatmap(sce, top_markers, annot.by = "label", cluster_rows = TRUE)

Marker gene detection (binomial test)

Detecting marker genes

markers_binom <- findMarkers(sce, groups = sce$label, test.type = "binom",
                             pval.type = "all", direction = "up", lfc = 1)
markers_binom
## List of length 9
## names(9): beta alpha ductal acinar ... gamma endothelial quiescent_stellate
head(markers_binom[["alpha"]])
## DataFrame with 6 rows and 11 columns
##            p.value         FDR summary.logFC logFC.beta logFC.ductal
##          <numeric>   <numeric>     <numeric>  <numeric>    <numeric>
## LOXL4  5.19459e-12 8.67943e-08       3.33524    4.59809      1.96056
## VSTM2L 8.62552e-12 8.67943e-08       2.07174    4.08888      3.08191
## PTPRT  2.53066e-07 1.69765e-03       7.81038    5.81481      4.86468
## CRH    4.01467e-07 2.01988e-03       4.98534    6.44238      5.94797
## DPP4   1.44862e-06 5.83069e-03       2.84062    7.18042      3.46661
## IRX2   3.31187e-06 1.11086e-02       2.02938    5.21750      5.09508
##        logFC.acinar logFC.delta logFC.activated_stellate logFC.gamma
##           <numeric>   <numeric>                <numeric>   <numeric>
## LOXL4       6.75299     4.54195                  5.69174     3.33524
## VSTM2L      5.62136     2.07174                  5.44223     4.12535
## PTPRT       3.55933     4.98348                  4.52370     4.38147
## CRH         8.41771     2.30084                  5.60324     8.07133
## DPP4        5.95683     6.62961                  2.84062     8.22904
## IRX2        5.18530     5.49620                  3.32124     4.03097
##        logFC.endothelial logFC.quiescent_stellate
##                <numeric>                <numeric>
## LOXL4            3.82123                  5.93868
## VSTM2L           4.88695                  4.36412
## PTPRT            7.85509                  7.81038
## CRH              8.06966                  4.98534
## DPP4             8.22736                  8.18262
## IRX2             4.58759                  2.02938

Top marker for each cell type

top_markers_binom <- lapply(markers_binom, function(x) rownames(x)[1]) %>%
    unlist() %>%
    enframe()
top_markers_binom
name value
beta ADCYAP1
alpha LOXL4
ductal S100A14
acinar PNLIPRP2
delta LEPR
activated_stellate VCAN
gamma PPY2P
endothelial PLVAP
quiescent_stellate RGS5

Top marker gene tables

for (cell_type in levels(sce$label)) {
    cat("\n\n###", cell_type, "\n")
    markers_binom[[cell_type]] %>%
        as.data.frame() %>%
        head(n = 5) %>%
        t() %>%
        kable() %>%
        print()
}

beta

ADCYAP1 DLK1 MAFA RASGRF1 SNCB
p.value 0.000000 0.000000 0.000000 0.0000012 0.0000014
FDR 0.000000 0.000000 0.000000 0.0058331 0.0058331
summary.logFC 2.605743 2.910090 3.834355 2.9370523 3.1508492
logFC.alpha 4.613682 3.305541 5.155296 1.5669569 3.5755775
logFC.ductal 4.621828 2.641131 5.681751 5.3419522 4.7526096
logFC.acinar 4.331442 5.122187 6.732706 6.5446165 6.3372633
logFC.delta 4.730977 2.753637 4.126400 4.7940737 2.8958652
logFC.activated_stellate 4.733358 2.990814 4.316424 5.0028854 4.7956557
logFC.gamma 2.605743 2.910090 3.661336 4.8598734 3.1508492
logFC.endothelial 3.770981 4.122445 3.793691 2.9370523 4.0924907
logFC.quiescent_stellate 3.863969 4.253747 3.834355 4.3372237 5.0456052

alpha

LOXL4 VSTM2L PTPRT CRH DPP4
p.value 0.0000000 0.0000000 0.0000003 0.0000004 0.0000014
FDR 0.0000001 0.0000001 0.0016977 0.0020199 0.0058307
summary.logFC 3.3352383 2.0717381 7.8103810 4.9853441 2.8406211
logFC.beta 4.5980875 4.0888774 5.8148150 6.4423805 7.1804179
logFC.ductal 1.9605563 3.0819110 4.8646848 5.9479742 3.4666089
logFC.acinar 6.7529880 5.6213603 3.5593319 8.4177074 5.9568314
logFC.delta 4.5419465 2.0717381 4.9834762 2.3008428 6.6296054
logFC.activated_stellate 5.6917436 5.4422331 4.5237044 5.6032359 2.8406211
logFC.gamma 3.3352383 4.1253451 4.3814676 8.0713321 8.2290368
logFC.endothelial 3.8212305 4.8869468 7.8550854 8.0696604 8.2273645
logFC.quiescent_stellate 5.9386837 4.3641175 7.8103810 4.9853441 8.1826153

ductal

S100A14 LGALS4 MMP7 KRT19 SERPINA5
p.value 0.000000 0.000000 0.000000 0.000000 0.000000
FDR 0.000000 0.000000 0.000000 0.000000 0.000000
summary.logFC 5.579370 6.282444 5.008680 1.830028 5.363959
logFC.beta 7.398453 6.867289 6.307799 4.405118 7.182655
logFC.alpha 7.631980 6.640832 6.434857 2.933652 5.750874
logFC.acinar 2.680525 3.819441 4.491569 1.830028 2.644553
logFC.delta 7.782709 5.915455 5.568680 2.643936 6.108808
logFC.activated_stellate 6.978012 6.846581 5.662904 4.927773 5.122560
logFC.gamma 8.710902 5.436201 8.665484 3.902541 6.642750
logFC.endothelial 8.707657 6.713523 6.012898 5.155344 8.492210
logFC.quiescent_stellate 5.579370 6.282444 5.008680 4.422923 5.363959

acinar

PNLIPRP2 SYCN GP2 PNLIPRP1 KLK1
p.value 0.000000 0.000000 0.000000 0.000000 0.000000
FDR 0.000000 0.000000 0.000000 0.000000 0.000000
summary.logFC 6.946080 6.182907 6.891676 5.305069 5.275695
logFC.beta 5.823442 4.888465 5.734574 5.039633 5.806388
logFC.alpha 5.852755 4.860960 5.180492 4.902885 5.417338
logFC.ductal 4.878166 3.141897 3.935038 3.347164 2.868675
logFC.delta 6.168890 5.204918 5.643477 4.830774 5.293188
logFC.activated_stellate 9.174728 5.022933 5.543417 4.884847 6.799810
logFC.gamma 9.140702 9.197809 6.561007 5.826751 6.167380
logFC.endothelial 5.730711 4.695536 5.676300 5.063130 5.487128
logFC.quiescent_stellate 6.946080 6.182907 6.891676 5.305069 5.275695

delta

LEPR CBLN4 BCHE ANK1 ASB4
p.value 0.000000 0.0000000 0.0000001 0.0000263 0.0004166
FDR 0.000000 0.0000175 0.0008331 0.1323688 1.0000000
summary.logFC 3.392435 6.6086469 4.8155120 3.0569354 2.4974976
logFC.beta 4.566356 6.8139486 4.6424254 2.9466281 2.2289499
logFC.alpha 5.229584 3.6745526 7.7133381 4.4365274 3.3654061
logFC.ductal 4.211284 6.8859184 7.6179518 3.9706336 5.2693363
logFC.acinar 5.254759 6.7524838 7.5123905 6.1422843 5.8115655
logFC.activated_stellate 4.162390 5.4450635 4.1970828 3.0569354 5.0406565
logFC.gamma 5.900306 6.7525073 5.2325387 4.0221911 2.4974976
logFC.endothelial 3.392435 5.3185642 6.6489124 3.5370377 4.9143142
logFC.quiescent_stellate 6.184251 6.6086469 4.8155120 6.0023611 4.5101849

activated_stellate

VCAN SFRP2 PDGFRA GFPT2 PTGDS
p.value 0.000000 0.000000 0.000000 0.000000 0.000000
FDR 0.000000 0.000000 0.000000 0.000000 0.000000
summary.logFC 5.578232 5.196986 5.518722 3.905623 5.332674
logFC.beta 5.074107 7.534733 8.703680 6.213142 7.235279
logFC.alpha 8.251878 8.591337 9.235905 7.640321 6.747495
logFC.ductal 5.910352 7.543325 6.764882 5.426840 8.111420
logFC.acinar 6.107224 6.476158 6.082705 4.056524 6.421809
logFC.delta 8.392280 7.661683 7.680261 5.390206 6.187309
logFC.gamma 7.679641 6.704471 4.909126 6.259456 5.742599
logFC.endothelial 5.012284 5.669788 4.894274 4.632556 4.115721
logFC.quiescent_stellate 5.578232 5.196986 5.518722 3.905623 5.332674

gamma

PPY2P SERTM1 CARTPT SLC6A4 ENTPD2
p.value 0.0000000 0.0000000 0.0000001 0.0000003 0.0000004
FDR 0.0000028 0.0000028 0.0009272 0.0013126 0.0015598
summary.logFC 6.0174853 5.0316691 4.1441266 2.2998986 1.9045908
logFC.beta 8.0651926 7.8047854 6.5147791 4.1608520 3.9604319
logFC.alpha 7.1771993 3.0983977 6.8265106 2.2442363 1.9045908
logFC.ductal 7.6402024 7.1231442 5.7661959 5.8179414 2.2983190
logFC.acinar 7.5058972 7.6833112 7.4283355 7.7163185 4.7523168
logFC.delta 7.0063068 6.4075325 4.1853757 2.2998986 3.4948202
logFC.activated_stellate 6.3455661 5.5591467 5.3058565 5.5919567 5.7310132
logFC.endothelial 6.2583859 6.4345301 4.5907538 6.4673178 6.6062855
logFC.quiescent_stellate 6.0174853 5.0316691 4.1441266 4.4293564 5.2030796

endothelial

PLVAP PECAM1 KDR PCAT19 FLT1
p.value 0.000000 0.000000 0.000000 0.000000 0.000000
FDR 0.000000 0.000000 0.000000 0.000000 0.000000
summary.logFC 7.691771 6.469591 7.384749 7.377452 5.720794
logFC.beta 6.310873 7.894593 9.453236 8.672246 8.107304
logFC.alpha 6.830492 8.305262 9.978239 8.558117 7.277281
logFC.ductal 7.098564 7.471288 7.864053 7.222254 7.119260
logFC.acinar 6.599252 7.312203 8.182390 7.347934 7.481147
logFC.delta 5.808388 6.981283 8.385494 7.604473 7.236684
logFC.activated_stellate 5.289146 5.222857 6.758953 6.751646 5.905582
logFC.gamma 6.949415 6.883140 7.637875 7.630571 6.767659
logFC.quiescent_stellate 7.691771 6.469591 7.384749 7.377452 5.720794

quiescent_stellate

RGS5 SLIT3 ANGPT1 TRPC6 CCDC3
p.value 0.000000 0.0000000 0.0000012 0.0000017 0.0000043
FDR 0.000000 0.0001562 0.0083006 0.0085468 0.0175079
summary.logFC 3.522647 3.5414473 4.3445466 4.3123597 2.7888628
logFC.beta 4.985819 8.1561608 7.7247701 7.2098526 7.1497795
logFC.alpha 5.794426 7.6107466 7.6147737 6.6646816 6.8068639
logFC.ductal 5.985049 7.0073398 6.6226867 6.0645573 6.5815102
logFC.acinar 5.949036 6.8546779 6.4798971 6.5818754 6.8800754
logFC.delta 5.984671 5.7863925 5.9401396 5.3244656 7.0057219
logFC.activated_stellate 3.429824 4.8445688 4.3445466 4.4449236 3.0761048
logFC.gamma 7.211424 3.5414473 5.1042882 4.3253957 6.1600463
logFC.endothelial 3.522647 4.3052975 5.0944362 4.3123597 2.7888628

Top marker gene dot plot

top_markers_binom <- sapply(markers_binom, function(x) rownames(x)[1])
dittoDotPlot(sce, top_markers_binom, group.by = "label")

Top marker gene heatmap

top_markers_binom <- sapply(markers_binom, function(x) rownames(x)[1])
dittoHeatmap(sce, top_markers_binom, annot.by = "label",
             scaled.to.max = TRUE, cluster_rows = FALSE)

Session Info

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] viridis_0.6.1               viridisLite_0.4.0          
##  [3] pheatmap_1.0.12             Nebulosa_1.2.0             
##  [5] patchwork_1.1.1             dittoSeq_1.4.3             
##  [7] PCAtools_2.4.0              ggrepel_0.9.1              
##  [9] scran_1.20.1                scater_1.20.1              
## [11] scuttle_1.2.1               scRNAseq_2.6.1             
## [13] SingleCellExperiment_1.14.1 SummarizedExperiment_1.22.0
## [15] Biobase_2.52.0              GenomicRanges_1.44.0       
## [17] GenomeInfoDb_1.28.4         IRanges_2.26.0             
## [19] S4Vectors_0.30.2            BiocGenerics_0.38.0        
## [21] MatrixGenerics_1.4.3        matrixStats_0.61.0         
## [23] Matrix_1.3-4                knitr_1.36                 
## [25] glue_1.4.2                  forcats_0.5.1              
## [27] stringr_1.4.0               dplyr_1.0.7                
## [29] purrr_0.3.4                 readr_2.0.2                
## [31] tidyr_1.1.4                 tibble_3.1.5               
## [33] ggplot2_3.3.5               tidyverse_1.3.1            
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3                rtracklayer_1.52.1           
##   [3] scattermore_0.7               SeuratObject_4.0.2           
##   [5] bit64_4.0.5                   irlba_2.3.3                  
##   [7] DelayedArray_0.18.0           rpart_4.1-15                 
##   [9] data.table_1.14.2             KEGGREST_1.32.0              
##  [11] RCurl_1.98-1.5                AnnotationFilter_1.16.0      
##  [13] generics_0.1.0                GenomicFeatures_1.44.2       
##  [15] ScaledMatrix_1.0.0            cowplot_1.1.1                
##  [17] RSQLite_2.2.8                 RANN_2.6.1                   
##  [19] future_1.22.1                 bit_4.0.4                    
##  [21] tzdb_0.1.2                    spatstat.data_2.1-0          
##  [23] xml2_1.3.2                    lubridate_1.8.0              
##  [25] httpuv_1.6.3                  assertthat_0.2.1             
##  [27] xfun_0.26                     hms_1.1.1                    
##  [29] jquerylib_0.1.4               evaluate_0.14                
##  [31] promises_1.2.0.1              fansi_0.5.0                  
##  [33] restfulr_0.0.13               progress_1.2.2               
##  [35] dbplyr_2.1.1                  readxl_1.3.1                 
##  [37] igraph_1.2.6                  DBI_1.1.1                    
##  [39] htmlwidgets_1.5.4             spatstat.geom_2.3-0          
##  [41] ellipsis_0.3.2                ks_1.13.2                    
##  [43] backports_1.2.1               biomaRt_2.48.3               
##  [45] deldir_1.0-5                  sparseMatrixStats_1.4.2      
##  [47] vctrs_0.3.8                   ensembldb_2.16.4             
##  [49] ROCR_1.0-11                   abind_1.4-5                  
##  [51] cachem_1.0.6                  withr_2.4.2                  
##  [53] sctransform_0.3.2             GenomicAlignments_1.28.0     
##  [55] prettyunits_1.1.1             mclust_5.4.7                 
##  [57] goftest_1.2-3                 cluster_2.1.2                
##  [59] ExperimentHub_2.0.0           lazyeval_0.2.2               
##  [61] crayon_1.4.1                  labeling_0.4.2               
##  [63] edgeR_3.34.1                  pkgconfig_2.0.3              
##  [65] nlme_3.1-152                  vipor_0.4.5                  
##  [67] ProtGenerics_1.24.0           rlang_0.4.11                 
##  [69] globals_0.14.0                lifecycle_1.0.1              
##  [71] miniUI_0.1.1.1                filelock_1.0.2               
##  [73] BiocFileCache_2.0.0           modelr_0.1.8                 
##  [75] rsvd_1.0.5                    AnnotationHub_3.0.1          
##  [77] cellranger_1.1.0              polyclip_1.10-0              
##  [79] lmtest_0.9-38                 zoo_1.8-9                    
##  [81] reprex_2.0.1                  beeswarm_0.4.0               
##  [83] ggridges_0.5.3                png_0.1-7                    
##  [85] rjson_0.2.20                  bitops_1.0-7                 
##  [87] KernSmooth_2.23-20            Biostrings_2.60.2            
##  [89] blob_1.2.2                    DelayedMatrixStats_1.14.3    
##  [91] parallelly_1.28.1             beachmat_2.8.1               
##  [93] scales_1.1.1                  memoise_2.0.0                
##  [95] magrittr_2.0.1                plyr_1.8.6                   
##  [97] ica_1.0-2                     zlibbioc_1.38.0              
##  [99] compiler_4.1.0                dqrng_0.3.0                  
## [101] BiocIO_1.2.0                  RColorBrewer_1.1-2           
## [103] fitdistrplus_1.1-6            Rsamtools_2.8.0              
## [105] cli_3.0.1                     XVector_0.32.0               
## [107] listenv_0.8.0                 pbapply_1.5-0                
## [109] mgcv_1.8-36                   MASS_7.3-54                  
## [111] tidyselect_1.1.1              stringi_1.7.5                
## [113] highr_0.9                     yaml_2.2.1                   
## [115] BiocSingular_1.8.1            locfit_1.5-9.4               
## [117] grid_4.1.0                    sass_0.4.0                   
## [119] tools_4.1.0                   future.apply_1.8.1           
## [121] rstudioapi_0.13               bluster_1.2.1                
## [123] metapod_1.0.0                 gridExtra_2.3                
## [125] farver_2.1.0                  Rtsne_0.15                   
## [127] digest_0.6.28                 BiocManager_1.30.16          
## [129] pracma_2.3.3                  shiny_1.7.1                  
## [131] Rcpp_1.0.7                    broom_0.7.9                  
## [133] BiocVersion_3.13.1            later_1.3.0                  
## [135] RcppAnnoy_0.0.19              httr_1.4.2                   
## [137] AnnotationDbi_1.54.1          colorspace_2.0-2             
## [139] rvest_1.0.1                   XML_3.99-0.8                 
## [141] fs_1.5.0                      tensor_1.5                   
## [143] reticulate_1.22               splines_4.1.0                
## [145] uwot_0.1.10                   statmod_1.4.36               
## [147] spatstat.utils_2.2-0          plotly_4.10.0                
## [149] xtable_1.8-4                  jsonlite_1.7.2               
## [151] R6_2.5.1                      pillar_1.6.3                 
## [153] htmltools_0.5.2               mime_0.12                    
## [155] fastmap_1.1.0                 BiocParallel_1.26.2          
## [157] BiocNeighbors_1.10.0          interactiveDisplayBase_1.30.0
## [159] codetools_0.2-18              mvtnorm_1.1-3                
## [161] utf8_1.2.2                    lattice_0.20-44              
## [163] bslib_0.3.1                   spatstat.sparse_2.0-0        
## [165] curl_4.3.2                    ggbeeswarm_0.6.0             
## [167] leiden_0.3.9                  survival_3.2-11              
## [169] limma_3.48.3                  rmarkdown_2.11               
## [171] munsell_0.5.0                 GenomeInfoDbData_1.2.6       
## [173] haven_2.4.3                   reshape2_1.4.4               
## [175] gtable_0.3.0                  spatstat.core_2.3-0          
## [177] Seurat_4.0.4